Kxmat = cbind(ints,jiadat$population,jiadat$SPC,jiadat$urban,jiadat$MidWest);
# Create Dependent Vectors
WalMart = jiadat$WalMart
Kmart = jiadat$Kmart
# Define NPL Negative log Likelihood
npl.nll <- function(theta,npl.pold.K,npl.pold.W) {
# Number of player specific parameters
kW = ncol(Wxmat);
kK = ncol(Kxmat);
# set parameters
theta.W = c(theta[1],theta[3:5],theta[6:7]);
theta.K = c(theta[2],theta[3:5],theta[8]);
delta   = exp(theta[9]); # We exponentiate to ensure competitive effects are negative
# Deterministic component of profits
pi.W = Wxmat%*%theta.W;
pi.K = Kxmat%*%theta.K;
# Compute probabilities
prob.W = pnorm(pi.W - delta*npl.pold.K);
prob.K = pnorm(pi.K - delta*npl.pold.W);
# Construct Negative LogLikelihood
ll = sum(WalMart*log(prob.W)+(1-WalMart)*log(1-prob.W) + Kmart*log(prob.K)+(1-Kmart)*log(1-prob.K));
-ll
}
# For NPL we simply iterate
# starting CCPs
# starting from data (could start anywhere)
npl.pold.K = Kmart
npl.pold.W = WalMart
# Main NPL loop
doNPLloop = function(theta,npl.pold.K,npl.pold.W,err.tol = 1E-8, quiet = FALSE) {
# Monkey with err.tol this to see effects
err =10;
npl.reps = 0;
tot.reps = 2000;
while(err>err.tol & npl.reps<tot.reps) {
npl.reps = npl.reps + 1;
old.theta = theta;
# Minimize Pseudo negative log Likelihood
kth.res = optim(theta,npl.nll,control=list(maxit=10000),method="BFGS",npl.pold.K=npl.pold.K,npl.pold.W=npl.pold.W)
# Update prameters
theta = kth.res$par;
npl.theta.W = c(theta[1],theta[3:5],theta[6:7]);
npl.theta.K = c(theta[2],theta[3:5],theta[8]);
npl.delta   = exp(theta[9]); # We exponentiate to ensure competitive effects are negative
# Compute Deterministic component of profits
npl.pi.W = Wxmat%*%npl.theta.W;
npl.pi.K = Kxmat%*%npl.theta.K;
# Now reconstruct probabilities
npl.pnew.W = pnorm(npl.pi.W - npl.delta*npl.pold.K);
npl.pnew.K = pnorm(npl.pi.K - npl.delta*npl.pold.W);
# Acceleration Trick of Kasahara and Shimotsu
# Do one more step (slower but gives quicker convergence)
npl.pnew.W = pnorm(npl.pi.W - npl.delta*npl.pnew.K);
npl.pnew.K = pnorm(npl.pi.K - npl.delta*npl.pnew.W);
# Compute error ||Pnew - Pold||
err = t(npl.pnew.K-npl.pold.K)%*%(npl.pnew.K-npl.pold.K)/2+t(npl.pnew.W-npl.pold.W)%*%(npl.pnew.W-npl.pold.W)/2;
# Update probabilities
npl.pold.W = npl.pnew.W;
npl.pold.K = npl.pnew.K;
# Spit out info
if(!quiet) {
cat("NPL Estimator: ",npl.reps," iterations completed \n")
cat(" Current Error: ",err,"\n \n");
}
if(npl.reps==tot.reps & err>err.tol) {
cat("The NPL algorithm did not converge.\n","Try increasing the number of iterations \n or lowering the tolerance. \n");
npl.res = kth.res
}
if(err<err.tol) {
npl.res = kth.res;
cat("Algorithm converged. \n");
}
}
# results in npl.res
npl.res
}
# Use same starting values as other algorithms
theta = c(-4.8960054, -15.3832747,   1.6653983,   0.9289151,   2.4720639,-1.4612322,   2.0311288 ,  2.1255131,   0.6889679);
NPL.res = doNPLloop(theta,npl.pold.K,npl.pold.W)
delta.NPL  =exp(NPL.res$par[9])
# There is a typo in the paper regarding the estiamte of delta printed as 1.6 something rather than 1.16 something
# Standard Errors
# The steps follow the 2-step bootstrap
# We leave this as an exercise to the reader! :)
getAnywhere(MASS.polr)
getAnywhere(polr)
getAnywhere(MASS:::polr)
getAnywhere(base:::polr)
getAnywhere(polr)
load("C:/Users/byontche/Dropbox/Slovakia/R_files_health_care/sarprobit_dentists_2001_new.RData")
probit_taxi <- polr(as.factor(pharm_7) ~ ln_pop_93 + wage_95 + unemp_95 + young_93 + old_93, data=characteristics_five, method = "probit")
require(MASS)
probit_taxi <- polr(as.factor(pharm_7) ~ ln_pop_93 + wage_95 + unemp_95 + young_93 + old_93, data=characteristics_five, method = "probit")
probit_taxi <- polr(as.factor(pharm_7) ~ ln_pop_01 + wage_01 + unemp_01 + young_01 + old_01, data=characteristics_one, method = "probit")
probit_taxi <- polr(as.factor(pharm_7) ~ ln_pop_01 + wage_01 + unemp_01 + young_01 + old_01, data=characteristics_one, method = "probit",hess=TRUE)
probit_taxi <- polr(as.factor(pharm_7) ~ ln_pop_01 + wage_01 + unemp_01 + young_01 + old_01, data=characteristics_one, method = "probit",Hess=TRUE)
summary(characteristics_one)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
##################################################
# Dataset 1: Layer of municipal boundaries (CensusUnits)#
##################################################
require(rgdal)
#CensusUnits data
#load("Generated_data/CensusUnits.RData", envir = .GlobalEnv)
CensusUnits<-CensusUnits[is.element(CensusUnits$Country,EU_22)==TRUE,]
CensusUnits_data<-as.data.frame(CensusUnits)
#####################################
# Dataset 2: Census data (CensusHub)#
#####################################
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
#POPULATION
library(readr)
Population1 <- read_csv("CensusHub/Population_part_1.csv")
Population1 <- Population1[,c("GEO","TIME","VALUE")]
Population1$Country<-substr(Population1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Population1<-Population1[is.element(Population1$Country,c("LU","LT","CY"))==FALSE,]
Population2 <- read_csv("CensusHub/Population_part_2.csv")
Population2 <- Population2[,c("GEO","TIME","VALUE")]
Population2$Country<-substr(Population2$GEO, start = 1, stop = 2)
Population<-rbind(Population1,Population2)
rm(Population1, Population2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Population<-Population[nchar(Population$GEO)>5,]
Population<-Population[is.element(Population$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Population$Country)
summary(Population$VALUE)
Population[is.na(Population$VALUE)==TRUE,]
require(plyr)
Population<-rename(Population,c("VALUE"="Population"))
#MALE
Male1 <- read_csv("CensusHub/Male_part_1.csv")
Male1 <- Male1[,c("GEO","VALUE")]
Male1$Country<-substr(Male1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Male1<-Male1[is.element(Male1$Country,c("LU","LT","CY"))==FALSE,]
Male2 <- read_csv("CensusHub/Male_part_2.csv")
Male2 <- Male2[,c("GEO","VALUE")]
Male2$Country<-substr(Male2$GEO, start = 1, stop = 2)
table(Male2$Country)
Male<-rbind(Male1,Male2)
rm(Male1, Male2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Male<-Male[nchar(Male$GEO)>5,]
Male$Country<-substr(Male$GEO, start = 1, stop = 2)
Male<-Male[is.element(Male$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Male$Country)
summary(Male$VALUE)
Male[is.na(Male$VALUE)==TRUE,]
require(plyr)
Male<-rename(Male,c("VALUE"="Male"))
Male <- Male[,c("GEO","Male")]
CensusHub<-merge(Population,Male,by="GEO")
CensusHub$MUN_ID<-sub('.*_', '', CensusHub$GEO)
CensusHub$NUTS3<-sub('_.*', '', CensusHub$GEO)
#Remove municipalities without a proper code
#CensusHub<-CensusHub[CensusHub$MUN_ID!="ZZ",]
#CensusHub<-CensusHub[CensusHub$MUN_ID!="MTZZ",]
#I also drop all observations with municipal IDs which are not numeric
#CensusHub$MUN_ID<-as.numeric(CensusHub$MUN_ID)
#CensusHub<-CensusHub[is.na(CensusHub$MUN_ID)==FALSE,]
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
#POPULATION
library(readr)
Population1 <- read_csv("CensusHub/Population_part_1.csv")
Population1 <- Population1[,c("GEO","TIME","VALUE")]
Population1$Country<-substr(Population1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Population1<-Population1[is.element(Population1$Country,c("LU","LT","CY"))==FALSE,]
Population2 <- read_csv("CensusHub/Population_part_2.csv")
Population2 <- Population2[,c("GEO","TIME","VALUE")]
Population2$Country<-substr(Population2$GEO, start = 1, stop = 2)
Population<-rbind(Population1,Population2)
rm(Population1, Population2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Population<-Population[nchar(Population$GEO)>5,]
Population<-Population[is.element(Population$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Population$Country)
summary(Population$VALUE)
Population[is.na(Population$VALUE)==TRUE,]
require(plyr)
Population<-rename(Population,c("VALUE"="Population"))
#MALE
Male1 <- read_csv("CensusHub/Male_part_1.csv")
Male1 <- Male1[,c("GEO","VALUE")]
Male1$Country<-substr(Male1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Male1<-Male1[is.element(Male1$Country,c("LU","LT","CY"))==FALSE,]
Male2 <- read_csv("CensusHub/Male_part_2.csv")
Male2 <- Male2[,c("GEO","VALUE")]
Male2$Country<-substr(Male2$GEO, start = 1, stop = 2)
table(Male2$Country)
Male<-rbind(Male1,Male2)
rm(Male1, Male2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Male<-Male[nchar(Male$GEO)>5,]
Male$Country<-substr(Male$GEO, start = 1, stop = 2)
Male<-Male[is.element(Male$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Male$Country)
summary(Male$VALUE)
Male[is.na(Male$VALUE)==TRUE,]
require(plyr)
Male<-rename(Male,c("VALUE"="Male"))
Male <- Male[,c("GEO","Male")]
CensusHub<-merge(Population,Male,by="GEO")
CensusHub$MUN_ID<-sub('.*_', '', CensusHub$GEO)
CensusHub$NUTS3<-sub('_.*', '', CensusHub$GEO)
#Remove municipalities without a proper code
#CensusHub<-CensusHub[CensusHub$MUN_ID!="ZZ",]
#CensusHub<-CensusHub[CensusHub$MUN_ID!="MTZZ",]
#I also drop all observations with municipal IDs which are not numeric
#CensusHub$MUN_ID<-as.numeric(CensusHub$MUN_ID)
#CensusHub<-CensusHub[is.na(CensusHub$MUN_ID)==FALSE,]
rename(CensusHub,c("GEO"="CENSUS_ID"))
save(CensusHub,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusHub.RData")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
#POPULATION
library(readr)
Population1 <- read_csv("CensusHub/Population_part_1.csv")
Population1 <- Population1[,c("GEO","TIME","VALUE")]
Population1$Country<-substr(Population1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Population1<-Population1[is.element(Population1$Country,c("LU","LT","CY"))==FALSE,]
Population2 <- read_csv("CensusHub/Population_part_2.csv")
Population2 <- Population2[,c("GEO","TIME","VALUE")]
Population2$Country<-substr(Population2$GEO, start = 1, stop = 2)
Population<-rbind(Population1,Population2)
rm(Population1, Population2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Population<-Population[nchar(Population$GEO)>5,]
Population<-Population[is.element(Population$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Population$Country)
summary(Population$VALUE)
Population[is.na(Population$VALUE)==TRUE,]
require(plyr)
Population<-rename(Population,c("VALUE"="Population"))
#MALE
Male1 <- read_csv("CensusHub/Male_part_1.csv")
Male1 <- Male1[,c("GEO","VALUE")]
Male1$Country<-substr(Male1$GEO, start = 1, stop = 2)
#For some reason Cyprus was included in the list of countries. Additionally, Luxemburg and Lithuania were downloaded twice.
Male1<-Male1[is.element(Male1$Country,c("LU","LT","CY"))==FALSE,]
Male2 <- read_csv("CensusHub/Male_part_2.csv")
Male2 <- Male2[,c("GEO","VALUE")]
Male2$Country<-substr(Male2$GEO, start = 1, stop = 2)
table(Male2$Country)
Male<-rbind(Male1,Male2)
rm(Male1, Male2)
#I delete all regional units with a code of less than 6 characters, since those are NUTS3 regions.
Male<-Male[nchar(Male$GEO)>5,]
Male$Country<-substr(Male$GEO, start = 1, stop = 2)
Male<-Male[is.element(Male$Country,EU_22)==TRUE,]
#Check that all countries are included
setdiff(EU_22,Male$Country)
summary(Male$VALUE)
Male[is.na(Male$VALUE)==TRUE,]
require(plyr)
Male<-rename(Male,c("VALUE"="Male"))
Male <- Male[,c("GEO","Male")]
CensusHub<-merge(Population,Male,by="GEO")
CensusHub$MUN_ID<-sub('.*_', '', CensusHub$GEO)
CensusHub$NUTS3<-sub('_.*', '', CensusHub$GEO)
#Remove municipalities without a proper code
#CensusHub<-CensusHub[CensusHub$MUN_ID!="ZZ",]
#CensusHub<-CensusHub[CensusHub$MUN_ID!="MTZZ",]
#I also drop all observations with municipal IDs which are not numeric
#CensusHub$MUN_ID<-as.numeric(CensusHub$MUN_ID)
#CensusHub<-CensusHub[is.na(CensusHub$MUN_ID)==FALSE,]
CensusHub<-rename(CensusHub,c("GEO"="CENSUS_ID"))
save(CensusHub,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusHub.RData")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
CensusUnits$Country<-substr(CensusUnits$CENSUS_ID,start=1,stop=2)
table(CensusUnits$Country)
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
CensusUnits<-CensusUnits[is.element(CensusUnits$Country,EU_22)==TRUE,]
table(CensusUnits$Country)
save(CensusUnits,"C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusUnits.RData")
save(CensusUnits,"C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusUnits.RData")
save(CensusUnits,"CensusUnits.RData")
save(CensusUnits,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusUnits.RData")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
##################################################
# Dataset 1: Layer of municipal boundaries (CensusUnits)#
##################################################
require(rgdal)
#CensusUnits data
load("Generated_data/CensusUnits.RData", envir = .GlobalEnv)
CensusUnits_data<-as.data.frame(CensusUnits)
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
CensusHub_Units<-merge(CensusUnits_data,CensusHub,by=c("Country","CENSUS_ID"))
CensusHub_Units<-merge(CensusUnits_data,CensusHub,by=c("Country","CENSUS_ID"),all.x=FALSE,all.y=FALSE)
length(unique(CensusHub_Units$CENSUS_ID))-length(CensusHub_Units$CENSUS_ID)
Unmatched<-CensusHub[is.element(CensusHub$CENSUS_ID,CensusHub_Units$CENSUS_ID)==FALSE,]
103204-6152
View(Unmatched)
table(Unmatched$Country)
View(Unmatched)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
##################################################
# Dataset 1: Layer of municipal boundaries (CensusUnits)#
##################################################
require(rgdal)
#CensusUnits data
load("Generated_data/CensusUnits.RData", envir = .GlobalEnv)
######################################
# Dataset 2: Census data (CensusHub)#
#####################################
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
CensusHub_Units<-merge(CensusUnits,CensusHub,by=c("Country","CENSUS_ID"),all.x=FALSE,all.y=FALSE)
#Check that each Census ID comes up only once
length(unique(CensusHub_Units$CENSUS_ID))-length(CensusHub_Units$CENSUS_ID)
Unmatched<-CensusHub[is.element(CensusHub$CENSUS_ID,CensusHub_Units$CENSUS_ID)==FALSE,]
rm(CensusUnits,CensusHub,Unmatched)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Income")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
Income_NUTS3 <- readOGR(dsn = ".", layer = "Income_NUTS3")
Income_NUTS3$Country<-substr(CensusUnits$NUTS_ID,start=1,stop=2)
Income_NUTS3$Country<-substr(Income_NUTS3$NUTS_ID,start=1,stop=2)
table(Income_NUTS3$Country)
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
Income_NUTS3<-Income_NUTS3[is.element(Income_NUTS3$Country,EU_22)==TRUE,]
table(Income_NUTS3$Country)
save(Income_NUTS3,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/Income_NUTS3.RData")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
######################################################
# Dataset 1: Layer of census boundaries (CensusUnits)#
######################################################
require(rgdal)
#CensusUnits data
load("Generated_data/CensusUnits.RData", envir = .GlobalEnv)
#####################################
# Dataset 2: Census data (CensusHub)#
#####################################
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
CensusHub_Units<-merge(CensusUnits,CensusHub,by=c("Country","CENSUS_ID"),all.x=FALSE,all.y=FALSE)
#Check that each Census ID comes up only once
length(unique(CensusHub_Units$CENSUS_ID))-length(CensusHub_Units$CENSUS_ID)
#The unmatched elements come from Greece and one municipality in Germany which contains people who obviously could not be attributed to any other administrative unit.
Unmatched<-CensusHub[is.element(CensusHub$CENSUS_ID,CensusHub_Units$CENSUS_ID)==FALSE,]
rm(CensusUnits,CensusHub,Unmatched)
#####################################
# Dataset 3: Income data (Groningen)#
#####################################
load("Generated_data/Income_NUTS3.RData")
CensusHub_Units$NUTS3<-substr(CensusHub_Units$CENSUS_ID,start=1,stop=5)
Income_NUTS3<-as.data.frame(Income_NUTS3)
which[is.element(Income_NUTS3$NUTS_ID,CensusHub_Units$NUTS3)==FALSE]
which[is.element(as.character(Income_NUTS3$NUTS_ID),CensusHub_Units$NUTS3)==FALSE]
which(is.element(as.character(Income_NUTS3$NUTS_ID),CensusHub_Units$NUTS3)==FALSE)
Income_NUTS3[which(is.element(as.character(Income_NUTS3$NUTS_ID),CensusHub_Units$NUTS3)==FALSE),]
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Income")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
Income_NUTS3 <- readOGR(dsn = ".", layer = "Income_NUTS3")
Income_NUTS3$Country<-substr(Income_NUTS3$NUTS_ID,start=1,stop=2)
#table(Income_NUTS3$Country)
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
Income_NUTS3<-Income_NUTS3[is.element(Income_NUTS3$Country,EU_22)==TRUE,]
Income_NUTS3<-as.data.frame(Income_NUTS3)
require(plyr)
rename(Income_NUTS3,c("NUTS3_ID"="NUTS3"))
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Income")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
Income_NUTS3 <- readOGR(dsn = ".", layer = "Income_NUTS3")
Income_NUTS3$Country<-substr(Income_NUTS3$NUTS_ID,start=1,stop=2)
require(plyr)
Income_NUTS3<-rename(Income_NUTS3,c("NUTS3_ID"="NUTS3"))
#table(Income_NUTS3$Country)
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
Income_NUTS3<-Income_NUTS3[is.element(Income_NUTS3$Country,EU_22)==TRUE,]
Income_NUTS3<-as.data.frame(Income_NUTS3)
#table(CensusUnits$Country)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Income")
require(rgdal)
# Read SHAPEFILE.shp from the current working directory (".")
Income_NUTS3 <- readOGR(dsn = ".", layer = "Income_NUTS3")
Income_NUTS3$Country<-substr(Income_NUTS3$NUTS_ID,start=1,stop=2)
require(plyr)
Income_NUTS3<-rename(Income_NUTS3,c("NUTS3_ID"="NUTS3"))
Income_NUTS3<-rename(Income_NUTS3,c("NUTS_ID"="NUTS3"))
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
Income_NUTS3<-Income_NUTS3[is.element(Income_NUTS3$Country,EU_22)==TRUE,]
save(Income_NUTS3,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/Income_NUTS3.RData")
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/")
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
######################################################
# Dataset 1: Layer of census boundaries (CensusUnits)#
######################################################
require(rgdal)
#CensusUnits data
load("Generated_data/CensusUnits.RData", envir = .GlobalEnv)
#####################################
# Dataset 2: Census data (CensusHub)#
#####################################
load("Generated_data/CensusHub.RData", envir = .GlobalEnv)
CensusHub_Units<-merge(CensusUnits,CensusHub,by=c("Country","CENSUS_ID"),all.x=FALSE,all.y=FALSE)
#Check that each Census ID comes up only once
length(unique(CensusHub_Units$CENSUS_ID))-length(CensusHub_Units$CENSUS_ID)
#The unmatched elements come from Greece and one municipality in Germany which contains people who obviously could not be attributed to any other administrative unit.
Unmatched<-CensusHub[is.element(CensusHub$CENSUS_ID,CensusHub_Units$CENSUS_ID)==FALSE,]
rm(CensusUnits,CensusHub,Unmatched)
CensusHub_Units$NUTS3<-substr(CensusHub_Units$CENSUS_ID,start=1,stop=5)
#####################################
# Dataset 3: Income data (Groningen)#
#####################################
load("Generated_data/Income_NUTS3.RData")
Income_NUTS3<-as.data.frame(Income_NUTS3)
Income_NUTS3[which(is.element(as.character(Income_NUTS3$NUTS_ID),CensusHub_Units$NUTS3)==FALSE),]
CensusHub_Income<-merge(CensusHub_Units,Income_NUTS3,by=c("Country","NUTS3"))
View(Income_NUTS3)
sum(is.na(CensusHub_Income$INCOME))
table(is.na(CensusHub_Income$INCOME),CensusHub_Income$Country)
table(CensusHub_Income$Country,is.na(CensusHub_Income$INCOME),)
table(CensusHub_Income$Country,is.na(CensusHub_Income$INCOME))
summary(CensusHub_Income@data)
sum(is.na(CensusHub_Income$INCOME))
table(CensusHub_Income$Country,is.na(CensusHub_Income$INCOME))
plot(CensusHub_Income)
#Summarize the data.
summary(CensusHub_Income@data)
library(raster)
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
#Define the regions in Europe that we are interested in.
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
#Load the package which allows you to read-in data from ArcGIS
require(rgdal)
#Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
library(raster)
crs(CensusUnits)
CensusUnits$Area_sqkm <- area(CensusUnits) / 1000000
CensusUnits@data[305,]
CensusUnits@data[9005,]
CensusUnits@data[14005,]
CensusUnits@data[305,]
rm(list=ls()) #Clear working space and set working directory
setwd("C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Administrative Boundaries/CENSUS_UNITS_2011_RG/data/")
#Define the regions in Europe that we are interested in.
EU_22 = c("AT","BE","BG","HR","CZ","EE","FR","DE","EL","HU","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES")     #Your stop words file
#Load the package which allows you to read-in data from ArcGIS
require(rgdal)
#Read SHAPEFILE.shp from the current working directory (".")
CensusUnits <- readOGR(dsn = ".", layer = "CENSUS_UNIT_RG_01M_2011")
#Load this package to calculate surface area
library(raster)
#Make sure to check that you are using a projection which is in lonlat format.
crs(CensusUnits)
#Calculate surface area
CensusUnits$Area_sqkm <- area(CensusUnits) / 1000000
#I checked the resulting area for Deutschkreuz (CENSUS_ID AT111_10801).
#It is 34.3 sqkm. The area according to Google is 34.1 sqkm.
#The resulting area for Otterthal (AT122_31820) is 5.9. Google reports 6.16.
#The first 2 characters of the CENSUS IDs correspond to each country's code.
#They are stored in a separate variable named Country.
CensusUnits$Country<-substr(CensusUnits$CENSUS_ID,start=1,stop=2)
#Select only those countries which are relevant for our analysis.
CensusUnits<-CensusUnits[is.element(CensusUnits$Country,EU_22)==TRUE,]
#Save the data as an RData file.
save(CensusUnits,file="C:/Users/byontche/Dropbox/Notaries_in_Europe/Data/Generated_data/CensusUnits.RData")
